#### Pollution and Regime Support: OLS Analysis
rm(list = ls())
### libraries and packages
pkg <- c("plyr", "dplyr", "tidyr", 
         "MASS", "multiwayvcov", 
         "stargazer", "lmtest", "doBy", "ggplot2", 
         "mediation", "vars", "DataCombine", "foreign",
         "mgcv", "ordinal", "reshape2", 
         "xtable", "sandwich", "rms")
lapply(pkg, require, character.only = TRUE)

# rid of E
options(scipen=999)
### setting wd for inputs
setwd("~/Dropbox/Pollution and Public Perceptions in China/Data and Analysis/Data")

# loading the data
load("ppp_cleaned")



#######################################
#######################################
## OLS Regressions on full sample - MODELS W/O COVARIATES
#######################################
#######################################

# OLS WITH parade + aqi.lagged + aqi.lagged2 + national 

l_sat <- lm(l_sat ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2) #  
m.vcov <- cluster.vcov(l_sat, p2$day)
coeftest(l_sat, m.vcov) 
se1 <- coeftest(l_sat, m.vcov)
NWvcov <- NeweyWest(l_sat, prewhite = FALSE, lag = NULL)
sen1 <- coeftest(l_sat, NWvcov) # coeftest with Newey-West vcov:  

c_sat <- lm(c_sat ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2) #  
m.vcov <- cluster.vcov(c_sat, p2$day)
coeftest(c_sat, m.vcov) 
se2 <- coeftest(c_sat, m.vcov)
NWvcov <- NeweyWest(c_sat, prewhite = FALSE, lag = NULL)
sen2 <- coeftest(c_sat, NWvcov) # coeftest with Newey-West vcov:  

l_watch <- lm(l_watch ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2) #  
m.vcov <- cluster.vcov(l_watch, p2$day)
coeftest(l_watch, m.vcov) #  
se3 <- coeftest(l_watch, m.vcov)
NWvcov <- NeweyWest(l_watch, prewhite = FALSE, lag = NULL) # Newey-West vcov
sen3 <- coeftest(l_watch, NWvcov) # coeftest with Newey-West vcov:  

c_watch <- lm(c_watch ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2) #  
m.vcov <- cluster.vcov(c_watch, p2$day)
coeftest(c_watch, m.vcov) 
se4 <- coeftest(c_watch, m.vcov)
NWvcov <- NeweyWest(c_watch, prewhite = FALSE, lag = NULL)
sen4 <- coeftest(c_watch, NWvcov) # coeftest with Newey-West vcov:  


anti_west <- lm(anti_west ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2) #  
m.vcov <- cluster.vcov(anti_west, p2$day)
coeftest(anti_west, m.vcov) 
se6 <- coeftest(anti_west, m.vcov) 
NWvcov <- NeweyWest(anti_west, prewhite = FALSE, lag = NULL)
sen6 <- coeftest(anti_west, NWvcov) # coeftest with Newey-West vcov  .

day_pollute <- lm(day_pollute ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2) #  
m.vcov <- cluster.vcov(day_pollute, p2$day)
coeftest(day_pollute, m.vcov) # 
se7 <- coeftest(day_pollute, m.vcov) #   
NWvcov <- NeweyWest(day_pollute, prewhite = FALSE, lag = NULL)
sen7 <- coeftest(day_pollute, NWvcov) # coeftest with Newey-West vcov: 

life_sat <- lm(life_sat ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2) #  
m.vcov <- cluster.vcov(life_sat, p2$day)
coeftest(life_sat, m.vcov) #
se8 <- coeftest(life_sat, m.vcov)
NWvcov <- NeweyWest(life_sat, prewhite = FALSE, lag = NULL)
sen8 <- coeftest(life_sat, NWvcov) #  

######################
# MAKING TABLE - NO CONTROLS, NO NEWEY-WEST
######################
column.labels <- c("Local Government", "Central Government", "Local Oversight", "Central Oversight", 
                     "Anti-West")
cov.labs <- c("AQI", "AQI Lagged", "AQI Lagged 2", "Parade Period", "Holiday")
## TABLE A8
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/ols_no_control.tex")
stargazer(l_sat, c_sat, l_watch, c_watch,  anti_west, font.size = "large", digits = 4, 
          column.labels = column.labels, 
          covariate.labels = cov.labs,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01), 
          se = list(se1[,2], se2[,2], se3[, 2], se4[, 2],  se6[, 2]),
          header = F, notes = "robust standard errors clustered by day in parentheses", float = F)
sink()

######################
# MAKING TABLE - NO CONTROLS WITH NEWEY-WEST
######################
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/nwreg1.tex")
stargazer(l_sat, c_sat, l_watch, c_watch,  anti_west, day_pollute, font.size = "large", digits = 4, 
          column.labels = column.labels, dep.var.labels.include = FALSE,
          covariate.labels = cov.labs,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01), 
          se = list(sen1[,2], sen2[,2], sen3[, 2], sen4[, 2],  sen6[, 2], sen7[, 2]),
          header = F, notes = "Newey-West robust standard errors in parentheses", float = F)
sink()


#######################################
#######################################
## OLS Regressions on full sample - MODELS WITH COVARIATES
#######################################
#######################################


# aqi and government satisfaction at the local level
l_satc <- lm(l_sat
             ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
             + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov1 <-cluster.vcov(l_satc, p2$day)  
coeftest(l_satc, m.vcov1) 
se11 <- coeftest(l_satc, m.vcov1)
#  
NWvcov <- NeweyWest(l_satc, lag = NULL, prewhite = FALSE)
sen11 <- coeftest(l_satc, NWvcov) 

# aqi and c_sat
c_satc <- lm(c_sat
             ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
             + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov25 <-cluster.vcov(c_satc, p2$day)  
coeftest(c_satc, m.vcov25) 
se22 <- coeftest(c_satc, m.vcov25)
# robust at .066
NWvcov <- NeweyWest(c_satc, lag = NULL, prewhite = FALSE)
sen22 <- coeftest(c_satc, NWvcov) 

# aqi and the demand for more watchdog at the local level
l_watchc <- lm(l_watch
               ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
               + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov13 <-cluster.vcov(l_watchc, p2$day)  
coeftest(l_watchc, m.vcov13) # coeftest
# robust p = .03313
se33 <- coeftest(l_watchc, m.vcov13)
NWvcov <- NeweyWest(l_watchc, lag = NULL, prewhite = FALSE)
sen33 <- coeftest(l_watchc, NWvcov) 

# aqi and the demand for more watchdog at the central level 
c_watchc <- lm(c_watch
               ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
               + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov14 <-cluster.vcov(c_watchc, p2$day)  
coeftest(c_watchc, m.vcov14) 
# robust at .065
se44 <- coeftest(c_watchc, m.vcov14)
NWvcov <- NeweyWest(c_watchc, lag = NULL, prewhite = FALSE)
sen44 <- coeftest(c_watchc, NWvcov) 

# aqi and anti-west sentiment
anti_westc <- lm(anti_west
                 ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                 + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov25 <-cluster.vcov(anti_westc, p2$day)  
coeftest(anti_westc, m.vcov25) 
se66 <- coeftest(anti_westc, m.vcov25) 

NWvcov <- NeweyWest(anti_westc, lag = NULL, prewhite = FALSE)
sen66 <- coeftest(anti_westc, NWvcov) 

# day pollute
day_pollutec <- lm(day_pollute
                   ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                   + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov25 <-cluster.vcov(day_pollutec, p2$day)  
coeftest(day_pollutec, m.vcov25) 
se77 <- coeftest(day_pollutec, m.vcov25) 
 
NWvcov <- NeweyWest(day_pollutec, lag = NULL, prewhite = FALSE)
sen77 <- coeftest(day_pollutec, NWvcov)

# aqi and life stisfaction
life_satc <- lm(life_sat
                ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                + kids + married + income + gender + ccp + parade + national + age, data=p2)
m.vcov25 <-cluster.vcov(life_satc, p2$day)  
coeftest(life_satc, m.vcov25) 

se88 <- coeftest(life_satc, m.vcov25) 
NWvcov <- NeweyWest(life_satc, lag = NULL, prewhite = FALSE)
sen88 <- coeftest(life_satc, NWvcov) 

# covariate labels for the expanded list, 
cov.labs.c <-c("AQI", "AQI Lagged", "AQI Lagged 2",
               "Hukou Status", "Regime Insider", "SOE Employee", 
               "Education","Children","Married","Income", "Female", 
               "CCP Member","Parade Period","Holiday","Age")

####################
# THE REGRESSION TABLE IN THE PUBLISHED ARTICLE #
## TABLE 1
####################

sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/ols_control.tex")
stargazer(l_satc, c_satc, l_watchc, c_watchc,  anti_westc, font.size = "large", digits = 4, 
          column.labels = column.labels,
          covariate.labels = cov.labs.c,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01), 
          se = list(se11[,2], se22[,2], se33[, 2], se44[, 2],  se66[, 2]),
          header = F, notes = "robust standard errors clustered by day in parentheses", float = F)
sink()

####################
# MAKING TABLE FOR OLS W/ CONTROLS, AND NEWEY WEST
####################
## TABLE A15
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/nw_control.tex")
stargazer(l_satc, c_satc, l_watchc, c_watchc,  anti_westc, font.size = "large", digits = 4, 
          column.labels = column.labels, 
          covariate.labels = cov.labs.c,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01), 
          se = list(sen11[,2], sen22[,2], sen33[, 2], sen44[, 2], sen66[, 2]),
          header = F, notes = "Newey West robust standard errors in parentheses", float = F)
sink()

###############################
###### Making a figure for point estimates and confidence intervals obtained from the control model ######
###############################
## FIGURE A5
setwd("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Figures")
pdf("figure_effect.pdf", height=6, width=8)
coef.vect <- c(se11[,1][2], se22[,1][2], se33[, 1][2], se44[, 1][2], se66[, 1][2])
sd.vect <- c(se11[,2][2], se22[,2][2], se33[, 2][2], se44[, 2][2],  se66[, 2][2])
longnames <- factor(c("Local government \n satisfaction", "Central government \n satisfaction", "Demand for public oversight \n over local government",
                      "Demand for public oversight \n over central government",
                       "Anti-West"),
                    levels=c("Anti-West", 
                             "Demand for public oversight \n over central government", "Demand for public oversight \n over local government",
                             "Central government \n satisfaction", "Local government \n satisfaction"))

frame = data.frame(longnames, coef.vect, sd.vect)

c <- ggplot(frame, aes(longnames, coef.vect))
c + geom_point(size=3) + geom_errorbar(aes(ymin=coef.vect-(qnorm(.975)*sd.vect),
                                           ymax=coef.vect+(qnorm(.975)*sd.vect), width=0)) + coord_flip() +
  geom_hline(yintercept=0, linetype="longdash") + theme_bw() +
  xlab("Dependent Variables") + ylab("Coefficients of Daily Pollution") +
  ggtitle("Effects of Pollution") +
  theme(plot.title = element_text(vjust = 1.3),
        axis.text.x = element_text(size=10))
dev.off()


###############################################
###############################################
######### OLS ON THE POST-parade SAMPLE ONLY
#  #########
# Now, see if the results hold
# after subsetting out everything before September 4th. (HARD TEST)
###############################################
###############################################

p2pp <- p2[which(p2$day > 21), ]
# government satisfaction at the local level and aqi
l_satct <- lm(l_sat
              ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
              + kids + married + income + gender + ccp +  national + age, data=p2pp)
m.vcov <-cluster.vcov(l_satct, p2pp$day)  
SE.1 <- coeftest(l_satct, m.vcov) # coeftest
# Robust
NWvcov <- NeweyWest(l_satct, lag = NULL, prewhite = FALSE)
coeftest(l_satct, NWvcov) 
sect1 <- coeftest(l_satct, m.vcov)

# government satisfaction at the central level and aqi
c_satct <- lm(c_sat
              ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
              + kids + married + income + gender + ccp +  national + age, data=p2pp)
m.vcov <-cluster.vcov(c_satct, p2pp$day)  
SE.2 <-coeftest(c_satct, m.vcov) # coeftest
# Robust
NWvcov <- NeweyWest(c_satct, lag = NULL, prewhite = FALSE)
coeftest(c_satct, NWvcov) 
sect2 <- coeftest(c_satct, m.vcov)

# aqi and the demand for more watchdog at the local level
l_watchct <- lm(l_watch
                ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                + kids + married + income + gender + ccp +  national + age, data=p2pp)
m.vcov <-cluster.vcov(l_watchct, p2pp$day)  
SE.3 <- coeftest(l_watchct, m.vcov) # coeftest
# Robust 
NWvcov <- NeweyWest(l_watchct, lag = NULL, prewhite = FALSE)
coeftest(l_watchct, NWvcov) # 
sect3 <- coeftest(l_watchct, m.vcov)

# Central watchdog and pollution
c_watchct <- lm(c_watch
                ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                + kids + married + income + gender + ccp +  national + age, data=p2pp)
m.vcov <-cluster.vcov(c_watchct, p2pp$day)  
SE.4 <- coeftest(c_watchct, m.vcov) # coeftest
# Robust
NWvcov <- NeweyWest(c_watchct, lag = NULL, prewhite = FALSE)
coeftest(c_watchct, NWvcov) # 
sect4 <- coeftest(c_watchct, m.vcov)

# Anti_west sentiments and pollution
anti_westct <- lm(anti_west
                  ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                  + kids + married + income + gender + ccp +  national + age, data=p2pp)
m.vcov <-cluster.vcov(anti_westct, p2pp$day)  
SE.6 <- coeftest(anti_westct, m.vcov) # sig
NWvcov <- NeweyWest(anti_westct, lag = NULL, prewhite = FALSE)
coeftest(anti_westct, NWvcov) # 
sect6 <- coeftest(anti_westct, m.vcov)

day_pollutect <- lm(day_pollute
                    ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                    + kids + married + income + gender + ccp +  national + age, data=p2pp)
m.vcov <-cluster.vcov(day_pollutect, p2pp$day)  
coeftest(day_pollutect, m.vcov) # coeftest
# Again not robust. 
NWvcov <- NeweyWest(day_pollutect, lag = NULL, prewhite = FALSE)
coeftest(day_pollutect, NWvcov) 
sect7 <- coeftest(day_pollutect, m.vcov)

# LIFE SATISFACTION
life_satct <- lm(life_sat
                 ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                 + kids + married + income + gender + ccp +  national + age, data=p2pp)
m.vcov <-cluster.vcov(life_satct, p2pp$day)  
coeftest(life_satct, m.vcov) # coeftest
# Robust
NWvcov <- NeweyWest(life_satct, lag = NULL, prewhite = FALSE)
coeftest(life_satct, NWvcov) 
sect8 <- coeftest(life_satct, m.vcov)

########### ########### ###########
### Post-Parade Table
########### ########### ###########

cov.labs.c <-c("AQI", "AQI Lagged", "AQI Lagged 2","Hukou Status", "Regime Insider", "SOE Employee", "Education","Children","Married","Income", "Female", "CCP Member","Holiday","Age")


# TABLE A13
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/postparade.tex")
stargazer(l_satct, c_satct, l_watchct, c_watchct,  anti_westct, font.size = "large", digits = 4, 
          column.labels = column.labels, 
          covariate.labels = cov.labs.c,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          star.cutoffs = c(.10, .05, .01), omit = c("Constant", "parade"),
          se = list(SE.1[,2], SE.2[,2], SE.3[, 2], SE.4[, 2],  SE.6[, 2]),
          header = F, notes = "robust standard errors clustered at the day level in parentheses", float = F)
sink()


### check_air ###
checkair <- lm(check_air ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2)
m.vcov <- cluster.vcov(checkair, p2$day)
sec <- coeftest(checkair, m.vcov)


sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/CHECK.tex")
## TABLE A16
stargazer(checkair, font.size = "large", digits = 4, 
          column.labels = "Frequency of Checking AQI", 
          covariate.labels = c("AQI", "AQI.lagged", "AQI.lagged2", "Parade Period", "Holiday"),
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01),
          se = list(sec[, 2]),
          header = F, notes = "robust standard errors clustered by day in parentheses", float = F)
sink()

####################################
####################################
####   TJ DUMMY ##
####################################
####################################

# aqi and government satisfaction at the local level
l_satc <- lm(l_sat
             ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
             + kids + married + income + gender + ccp + parade + national + tj + age, data=p2)
m.vcov1 <-cluster.vcov(l_satc, p2$day)  
coeftest(l_satc, m.vcov1) # coeftest ( )
se11 <- coeftest(l_satc, m.vcov1)
#  
NWvcov <- NeweyWest(l_satc, lag = NULL, prewhite = FALSE)
sen11 <- coeftest(l_satc, NWvcov) #   at .068


# aqi and c_sat
c_satc <- lm(c_sat
             ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
             + kids + married + income + gender + ccp + parade + national + tj + age, data=p2)
m.vcov25 <-cluster.vcov(c_satc, p2$day)  
coeftest(c_satc, m.vcov25) 
se22 <- coeftest(c_satc, m.vcov25)
NWvcov <- NeweyWest(c_satc, lag = NULL, prewhite = FALSE)
sen22 <- coeftest(c_satc, NWvcov) #   p = .0504604

# aqi and the demand for more watchdog at the local level
l_watchc <- lm(l_watch
               ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
               + kids + married + income + gender + ccp + parade + national + tj + age, data=p2)
m.vcov13 <-cluster.vcov(l_watchc, p2$day)  
coeftest(l_watchc, m.vcov13) # coeftest
se33 <- coeftest(l_watchc, m.vcov13)
NWvcov <- NeweyWest(l_watchc, lag = NULL, prewhite = FALSE)
sen33 <- coeftest(l_watchc, NWvcov) #   p = .022793

# aqi and the demand for more watchdog at the central level 
c_watchc <- lm(c_watch
               ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
               + kids + married + income + gender + ccp + parade + national + tj + age, data=p2)
m.vcov14 <-cluster.vcov(c_watchc, p2$day)  
coeftest(c_watchc, m.vcov14) # coeftest
se44 <- coeftest(c_watchc, m.vcov14)
NWvcov <- NeweyWest(c_watchc, lag = NULL, prewhite = FALSE)
sen44 <- coeftest(c_watchc, NWvcov) # not   but p = .102995

# aqi and anti-west sentiment
anti_westc <- lm(anti_west
                 ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                 + kids + married + income + gender + ccp + parade + national + tj + age, data=p2)
m.vcov25 <-cluster.vcov(anti_westc, p2$day) 
coeftest(anti_westc, m.vcov25) 
se66 <- coeftest(anti_westc, m.vcov25) 
NWvcov <- NeweyWest(anti_westc, lag = NULL, prewhite = FALSE)
sen66 <- coeftest(anti_westc, NWvcov) # not   but p = .1418136


# aqi and life stisfaction
life_satc <- lm(life_sat
                ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                + kids + married + income + gender + ccp + parade + national + tj + age, data=p2)
m.vcov25 <-cluster.vcov(life_satc, p2$day)  
coeftest(life_satc, m.vcov25) 
se88 <- coeftest(life_satc, m.vcov25) 
NWvcov <- NeweyWest(life_satc, lag = NULL, prewhite = FALSE)
sen88 <- coeftest(life_satc, NWvcov) #   p = .0102928



# covariate labels for the expanded list, 
cov.labs.c <-c("AQI", "AQI Lagged", "AQI Lagged 2","Hukou Status", "Regime Insider", "SOE Employee", "Education","Children","Married","Income", "Female", "CCP Member","Parade Period","Holiday","Tianjin", "Age")
column.labels <- c("Local Government", "Central Government", "Local Oversight", "Central Oversight", 
                     "Anti-West")

####################
## Tianjin  TABLE
####################
## TABLE A9
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/tj.tex")
stargazer(l_satc, c_satc, l_watchc, c_watchc,  anti_westc, font.size = "large", digits = 4, 
          column.labels = column.labels,
          covariate.labels = cov.labs.c,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01), 
          se = list(se11[,2], se22[,2], se33[, 2], se44[, 2],  se66[, 2]),
          header = F, notes = "robust standard errors clustered by day in parentheses", float = F)
sink()


##############################
###  AQI Figures - RAW and Residualized
##############################
collapse1 <- summaryBy(aqio + aqio.lagged + aqio.lagged2 ~ day,data=p2, FUN = mean)
colnames(p2)

###### RAW FIGURE
# Add month and day labels to the following ggplot figure
Aug <- vector(mode = "character", length = 18)
for (i in 14:31) {
  if (i == 14) {
  Aug[[i-(14-1)]] <- c(paste("Aug", i))
  } else {
    if (i == 20) {
    Aug[[i-(14-1)]] <- c(paste("Aug", i))
    } else {
      Aug[[i-(14-1)]] <- c("")
  }
  }
}
Aug

Sep <- vector(mode = "character", length = 30)
for (i in 1:30) {
  if (i == 3){
    Sep[[i]] <- c(paste("Sep", i))
  } else {
    Sep[[i]] <- c("")
  }
  
}
Sep

Oct <- vector(mode = "character", length = 9)
for (i in 1:9){
  if (i == 9) {
    Oct[[i]] <- c(paste("Oct", i))
  } else {
    Oct[[i]] <- c("")
  }
  
}
Oct

c(unlist(Aug), unlist(Sep), unlist(Oct))


setwd("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Figures")
# aqi over time, white lane
pdf(file = "raw_new.pdf", height = 8, width = 8)
ggplot(collapse1,aes(x=day,y=aqio.mean)) + geom_line() + 
  geom_vline(xintercept = 7, colour = "red", linetype = "longdash") + 
  geom_vline(xintercept = 21, colour = "red", linetype = "longdash") + theme_bw() +
  ggtitle("Raw AQI by Day") + ylab("Daily AQI") + xlab("Day") +
  scale_x_continuous(breaks = 1:57, labels= c(unlist(Aug), unlist(Sep), unlist(Oct))) +
  theme(plot.title = element_text(hjust = .5, size = 18, face = "bold"),
        axis.text.x = element_text(angle = 90, hjust = 1),
        axis.title=element_text(size=16))
dev.off()

#### RESIDUALIZED FIGURE

# Creating a variable for residualized AQI
collapse1$res <- lm(aqio.mean ~ aqio.lagged.mean + aqio.lagged2.mean, data = collapse1)$residuals 

# Plotting residualized AQI
# Setting the directory to the figures folder
setwd("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Figures")
# Plotting it time-series style
# residualized aqi over time, white lane
## FIGURE A4 
pdf("res_AQI.pdf", height=8, width=8)
ggplot(collapse1,aes(x=day,y=res)) + geom_line() + 
  geom_vline(xintercept = 7, colour = "red", linetype = "longdash") + 
  geom_vline(xintercept = 21, colour = "red", linetype = "longdash") + theme_bw() +
  ggtitle("Raw AQI by Day") + ylab("Daily AQI") + xlab("Day") +
  scale_x_continuous(breaks = 1:57, labels= c(unlist(Aug), unlist(Sep), unlist(Oct))) +
  theme(plot.title = element_text(hjust = .5, size = 18, face = "bold"),
        axis.text.x = element_text(angle = 90, hjust = 1),
        axis.title=element_text(size=16))
dev.off()

### HISTOGRAM OF AQI ###
## FIGURE A2 
setwd("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Figures")
pdf("AQI_hist.pdf", height = 8, width=8)
hist(collapse1$aqio.mean, xlab = "Daily AQI", ylab ="Number of Days",breaks = 20,xlim=c(0,350), main ="Histogram of Daily AQI")
dev.off()

# ###########################
# ##### ADDITIONAL ROBUSTNESS CHECK
# ##### HOLIDAYS TABLE
# ###########################

l_sath <- lm(l_sat ~ aqi + aqi.lagged + aqi.lagged2 + parade + national + holiday, data = p2) #  
m.vcov <- cluster.vcov(l_sath, p2$day)
coeftest(l_sath, m.vcov) 

c_sath <- lm(c_sat ~ aqi + aqi.lagged + aqi.lagged2 + parade + national + holiday, data = p2) #  
m.vcov <- cluster.vcov(c_sath, p2$day)
coeftest(c_sath, m.vcov) 

l_watchh <- lm(l_watch ~ aqi + aqi.lagged + aqi.lagged2 + parade + national + holiday, data = p2) #  
m.vcov <- cluster.vcov(l_watchh, p2$day)
coeftest(l_watchh, m.vcov) #  


c_watchh <- lm(c_watch ~ aqi + aqi.lagged + aqi.lagged2 + parade + national + holiday, data = p2) #  
m.vcov <- cluster.vcov(c_watchh, p2$day)
coeftest(c_watchh, m.vcov) 



anti_westh <- lm(anti_west ~ aqi + aqi.lagged + aqi.lagged2 + parade + national + holiday, data = p2) #  
m.vcov <- cluster.vcov(anti_westh, p2$day)
coeftest(anti_westh, m.vcov) 

life_sath <- lm(life_sat ~ aqi + aqi.lagged + aqi.lagged2 + parade + national + holiday, data = p2) #  
m.vcov <- cluster.vcov(life_sath, p2$day)
coeftest(life_sath, m.vcov) 

# aqi and government satisfaction at the local level
l_satch <- lm(l_sat
              ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
              + kids + married + income + gender + ccp + parade + holiday + national + age, data=p2)
m.vcov1 <-cluster.vcov(l_satch, p2$day)  
seh1 <- coeftest(l_satch, m.vcov1) # coeftest ( )


# aqi and c_sat
c_satch <- lm(c_sat
              ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
              + kids + married + income + gender + ccp + parade + holiday + national + age, data=p2)
m.vcov25 <-cluster.vcov(c_satch, p2$day)  
seh2 <- coeftest(c_satch, m.vcov25) 

# aqi and the demand for more watchdog at the local level
l_watchch <- lm(l_watch
                ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                + kids + married + income + gender + ccp + parade + holiday + national + age, data=p2)
m.vcov13 <-cluster.vcov(l_watchch, p2$day)  
seh3 <- coeftest(l_watchch, m.vcov13) # coeftest

# aqi and the demand for more watchdog at the central level 
c_watchch <- lm(c_watch
                ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                + kids + married + income + gender + ccp + parade + holiday + national + age, data=p2)
m.vcov14 <-cluster.vcov(c_watchch, p2$day)  
seh4 <- coeftest(c_watchch, m.vcov14) # coeftest

# aqi and anti-west sentiment
anti_westch <- lm(anti_west
                  ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                  + kids + married + income + gender + ccp + parade + holiday + national + age, data=p2)
m.vcov25 <-cluster.vcov(anti_westch, p2$day)  
seh6 <- coeftest(anti_westch, m.vcov25) 

# aqi and life stisfaction
life_satch <- lm(life_sat
                 ~ aqi + aqi.lagged + aqi.lagged2 + hukou + insider + soe + educ 
                 + kids + married + income + gender + ccp + parade + holiday + national + age, data=p2)
m.vcov25 <-cluster.vcov(life_satch, p2$day)  
seh8 <- coeftest(life_satch, m.vcov25) 

# covariate labels for the expanded list, 
cov.labs.c <-c("AQI", "AQI Lagged", "AQI Lagged 2","Hukou Status", "Regime Insider", "SOE Employee", "Education","Children","Married","Income", "Female", "CCP Member","Parade Period", "Weekends", "Holiday", "Age")

############################################
# TABLE --  HOLIDAY and FULL CONTROLS
############################################

column.labels <- c("Local Government", "Central Government", "Local Oversight", "Central Oversight",   "Anti-West")

## TABLE A12
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/holiday.tex")
stargazer(l_satch, c_satch, l_watchch, c_watchch,  anti_westch, font.size = "large", digits = 4, 
          column.labels = column.labels,
          covariate.labels = cov.labs.c,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01), 
          se = list(seh1[,2], seh2[,2], seh3[, 2], seh4[, 2],  seh6[, 2]),
          header = F, notes = "robust standard errors clustered by day in parentheses", float = F)
sink()

############################################
### NEIGHBOR TRUST and life satisfaction table ###
############################################

life_sat1 <- lm(life_sat ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2)
m.vcovl1 <- cluster.vcov(life_sat1, p2$day)
selife_sat1 <- coeftest(life_sat1, m.vcovl1)

life_sat2 <- lm(life_sat ~ aqi + aqi.lagged + aqi.lagged2 + parade + national + hukou + insider + soe + kids + married + gender + ccp + age + income, data = p2)
m.vcovl2 <- cluster.vcov(life_sat2, p2$day)
selife_sat2 <- coeftest(life_sat2, m.vcovl2)

nghbr_trust1 <- lm(nghbr_trust ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data = p2)
m.vcov1 <- cluster.vcov(nghbr_trust1, p2$day)
senghbr1 <- coeftest(nghbr_trust1, m.vcov1)

nghbr_trust2 <- lm(nghbr_trust ~ aqi + aqi.lagged + aqi.lagged2 + parade + national + hukou + insider + soe + kids + married + gender + ccp + age + income, data = p2)
m.vcov2 <- cluster.vcov(nghbr_trust2, p2$day)
senghbr2 <- coeftest(nghbr_trust2, m.vcov2)

fit.list <- list(life_sat1, life_sat2, nghbr_trust1,nghbr_trust2)
ses <- list(selife_sat1[,2], selife_sat2[,2], senghbr1[,2],senghbr2[,2])
covars <- c("AQI", "AQI Lagged", "AQI Lagged 2","Parade Period","Holiday","Hukou","Regime Insider","SOE Employee","Children","Married","Female","CCP","Age","Income")

sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/nghbr.trust.tex")
stargazer(fit.list, font.size = "large", digits = 4, 
          column.labels = c("Life satisfaction", "Life satisfaction", "Neighbor trust", "Neighbor trust"),
          covariate.labels = covars,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01),
          se = ses,
          header = F, notes = "robust standard errors clustered by day in parentheses", float = F)
sink()

########## ######### ########
## Now a Table For Life Sat Only
########## ######### ########

fit.list <- list(life_sat1, life_sat2)
ses <- list(selife_sat1[,2], selife_sat2[,2])
covars <- c("AQI", "AQI Lagged", "AQI Lagged 2","Parade Period","Holiday","Hukou","Regime Insider","SOE Employee","Children","Married","Female","CCP","Age","Income")
## TABLE A18
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/life_sat_only.tex")
stargazer(fit.list, font.size = "large", digits = 4, 
          column.labels = c("Life satisfaction", "Life satisfaction"),
          covariate.labels = covars,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01),
          se = ses,
          header = F, notes = "robust standard errors clustered by day in parentheses", float = F)
sink()

######################################
######################################
### Balance table ###
######################################
######################################

# running regressions of every single covariate on aqi + aqi.lagged + aqi.lagged2 + parade + national

hukou <- lm(hukou
            ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(hukou, p2$day)  
a <- coeftest(hukou, m.vcov1) # coeftest

insider <- lm(insider
              ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(insider, p2$day)  
b <- coeftest(insider, m.vcov1) # coeftest

soe <- lm(soe
          ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(soe, p2$day)  
c <- coeftest(soe, m.vcov1) # coeftest

educ <- lm(educ
           ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(educ, p2$day)  
d <- coeftest(educ, m.vcov1) # coeftest

kids <- lm(kids
           ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(kids, p2$day)  
e <- coeftest(kids, m.vcov1) # coeftest

married <- lm(married
              ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(married, p2$day)  
f <- coeftest(married, m.vcov1) # coeftest

income <- lm(income
             ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(income, p2$day)  
g <- coeftest(income, m.vcov1) # coeftest

gender <- lm(gender
             ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(gender, p2$day)  
h <- coeftest(gender, m.vcov1) # coeftest

ccp <- lm(ccp
          ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(ccp, p2$day)  
i <- coeftest(ccp, m.vcov1) # coeftest

age <- lm(age
          ~ aqi + aqi.lagged + aqi.lagged2 + parade + national, data=p2)
m.vcov1 <-cluster.vcov(age, p2$day)  
j <- coeftest(age, m.vcov1) # coeftest


column.labels <- c("Hukou Status", "Regime Insider", "SOE Employee", "Education","Children","Married","Income", "Female", "CCP Member","Age")

cov.labels <- c("AQI", "AQI Lagged", "AQI Lagged 2","Parade Period","Holiday")

#####################################
### Balance Tables
#####################################

## TABLE A17
sink("~/Dropbox/Pollution and Public Perceptions in China/Manuscript/Tables/bal.regs.tex")
stargazer(hukou,insider,soe,educ,kids,married,income,gender,ccp,age,
          font.size = "large", digits = 4,
          column.labels = column.labels,
          covariate.labels = cov.labels,
          dep.var.labels.include = FALSE,
          model.numbers = FALSE,
          style = "qje", no.space = FALSE, keep.stat = c("n"),
          omit ="Constant",
          star.cutoffs = c(.10, .05, .01), 
          se = list(a[,2], b[,2], c[, 2], d[, 2], e[, 2], f[, 2],
                    g[, 2],h[, 2],i[ ,2],j[ ,2]),
          header = F, notes = "robust standard errors clustered by day in parentheses", float = F)
sink()